loading all the libraries that will be needed in this analysis.
# general
library(tidyverse)
library(readr)
library(readxl)
library(markdown)
library(rmarkdown)
library(DT)
library(cowplot)
library(ggrepel)
library(RColorBrewer)
# alignment, DE, annotation
library(DESeq2)
# BiocManager::install("biomaRt")
library(biomaRt)
# BiocManager::install("org.Mm.eg.db")
library(org.Mm.eg.db)
# data exploration and analysis
library(clusterProfiler)
library(DOSE)
library(plotly)
library(GeneTonic)
# BiocManager::install("pcaExplorer")
library(pcaExplorer)
library(visNetwork)
library(magrittr)
# install.packages("enrichR")
library(enrichR)
# install.packages("pheatmap")
library(pheatmap)
library(enrichplot)
# install.packages("igraph")
library(igraph)
library(visNetwork)
library(magrittr)
# BiocManager::install("apeglm")
library(apeglm)
Defining a custom function for the volcano plots (to be able to annote genes belonging to a cluster of gene sets supplied as a character vector)
volc_distilled <- function(exp_thres, cluster, distilled, data){
volcano <- data %>%
select(log2FoldChange, padj, geneSymbol, id) %>%
arrange(desc(log2FoldChange)) %>%
mutate(plog10 = -log(padj)) %>%
na.omit()
target_cluster <- distilled$res_enrich %>%
filter(gs_membership == cluster) %>%
select(gs_genes) %>%
as_vector() %>%
str_split(",") %>%
unname() %>%
unlist()
volcano_target <- volcano %>%
filter(geneSymbol %in% target_cluster)
## Plot function for the volcano plot standard volcano
p <- ggplot(mapping = aes(x = log2FoldChange, y = `plog10`)) +
# general scatter plot with all the genes
geom_point(data = volcano %>%
filter(
log2FoldChange >= exp_thres | log2FoldChange <= -exp_thres
),
size = 3,
alpha = 0.5,
color = "grey"
) +
geom_point(data = volcano %>%
filter(
(log2FoldChange <= exp_thres & log2FoldChange >= 0) |
(log2FoldChange >= -exp_thres & log2FoldChange <= 0)
),
size = 3,
alpha = 0.01,
color = "grey"
) +
theme_minimal() +
geom_vline(aes(xintercept = 0)) +
geom_vline(aes(xintercept = exp_thres)) +
geom_vline(aes(xintercept = -exp_thres)) +
theme( axis.line.x = element_line(color="black"),
axis.title.x = element_text(size=12, face="bold",
margin = margin(t = 7, b = 5)),
axis.title.y = element_text(size=12, face="bold",
margin = margin(r = 11, l = 5)),
axis.text.x = element_text(size=11, face="bold"),
axis.text.y = element_text(size=10, face="bold")
) +
guides(color = FALSE) +
scale_color_manual(values = c("#A0A09F", "#A7CFEE")) +
labs(y = "BH-adjusted P (-log10)",
x = "log2(fold change)"
) +
xlim(-10, 10) +
coord_fixed(ratio = 1/50) +
geom_point(data = volcano_target %>%
filter(
log2FoldChange >= exp_thres | log2FoldChange <= -exp_thres
),
color = "#BA2B34",
alpha = 0.8,
size = 4
) +
geom_point(data = volcano_target %>%
filter(
(log2FoldChange <= exp_thres & log2FoldChange >= 0) |
(log2FoldChange >= -exp_thres & log2FoldChange <= 0)
),
color = "#BA2B34",
alpha = 0.1,
size = 3
) +
geom_text_repel(data = volcano_target %>%
filter(
log2FoldChange >= exp_thres | log2FoldChange <= -exp_thres
),
aes(label = geneSymbol, x = log2FoldChange-0.1),
size = 3, hjust = 1, vjust = 0, max.overlaps = 15
)
return(p)
}
volc_distilled2 <- function(exp_thres, gs_ids, data, res_enrich, add){
volcano <- data %>%
select(log2FoldChange, padj, geneSymbol, id) %>%
arrange(desc(log2FoldChange)) %>%
mutate(plog10 = -log(padj)) %>%
na.omit()
target <- res_enrich %>%
filter(gs_id %in% gs_ids) %>%
select(gs_genes) %>%
as_vector() %>%
strsplit(",") %>%
unlist() %>%
unname() %>%
c(., add)
volcano_target <- volcano %>%
filter(geneSymbol %in% target)
## Plot function for the volcano plot standard volcano
p <- ggplot(mapping = aes(x = log2FoldChange, y = `plog10`)) +
# general scatter plot with all the genes
geom_point(data = volcano %>%
filter(
log2FoldChange >= exp_thres | log2FoldChange <= -exp_thres
),
size = 3,
alpha = 0.5,
shape = 21
) +
geom_point(data = volcano %>%
filter(
(log2FoldChange <= exp_thres & log2FoldChange >= 0) |
(log2FoldChange >= -exp_thres & log2FoldChange <= 0)
),
size = 3,
alpha = 0.01,
shape = 21
) +
theme_minimal() +
geom_vline(aes(xintercept = 0)) +
geom_vline(aes(xintercept = exp_thres)) +
geom_vline(aes(xintercept = -exp_thres)) +
theme( axis.line.x = element_line(color="black"),
axis.title.x = element_text(size=12, face="bold",
margin = margin(t = 7, b = 5)),
axis.title.y = element_text(size=12, face="bold",
margin = margin(r = 11, l = 5)),
axis.text.x = element_text(size=11, face="bold"),
axis.text.y = element_text(size=10, face="bold")
) +
guides(color = FALSE) +
scale_color_manual(values = c("#A0A09F", "#A7CFEE")) +
labs(y = "BH-adjusted P (-log10)",
x = "log2(fold change)"
) +
xlim(-10, 10) +
coord_fixed(ratio = 1/50) +
geom_point(data = volcano_target %>%
filter(
log2FoldChange >= exp_thres | log2FoldChange <= -exp_thres
),
color = "#BA2B34",
alpha = 1,
size = 3
) +
geom_point(data = volcano_target %>%
filter(
(log2FoldChange <= exp_thres & log2FoldChange >= 0) |
(log2FoldChange >= -exp_thres & log2FoldChange <= 0)
),
color = "#BA2B34",
alpha = 0.1,
size = 3
) +
geom_text_repel(data = volcano_target %>%
filter(
log2FoldChange >= exp_thres | log2FoldChange <= -exp_thres
),
aes(label = geneSymbol, x = log2FoldChange-0.1),
size = 3, hjust = 1, vjust = 0, max.overlaps = 15
)
return(p)
}
Reading in the gene tonic data set (containing DESeq2 results as well as GO ORA analysis)
data <- readRDS("gtl_condition_g2_vs_g1.rds")
Providing an annotation file for the PCA Explorer, acquiring the consensus gene names per ENSEMBL ID
anno <- get_annotation(data$dds,
biomart_dataset = "mmusculus_gene_ensembl",
idtype = "ensembl_gene_id")
GeneTonic(gtl = data,
project_id = "T8_I4vGFP")
pcaExplorer(dds = data$dds,
annotation = anno)
Identifying differentially expressed gene sets between the two conditions. DESeq2 results as well as the result of ORA analysis of GO gene sets for the present dataset are provided. Here, we will have a look at the top 30 differentially expressed GO sets.
p <- enhance_table(gtl = data,
n_gs = 30,
chars_limit = 60)
ggplotly(p)
# ggsave("Go_overview.pdf")
Reporting the top 100 genesets for later lookup as an interactive table.
data$res_enrich %>%
head(n = 100)%>%
datatable()
datatable(data$res_enrich)
We can also “distill” this data (GeneTonic) as a markov clustering to get an idea of geneset clusters that belong together.
distilled <- distill_enrichment(gtl = data,
n_gs = 100,
cluster_fun = "cluster_markov")
distilled$distilled_table %>%
datatable()
plotting these new clusters in an interactive network.
# storing the clustered results for later
dg <- distilled$distilled_em
# defining a color palette for nicer display
colpal <- colorspace::rainbow_hcl(length(unique(V(dg)$color)))[V(dg)$color]
V(dg)$color.background <- scales::alpha(colpal, alpha = 0.8)
V(dg)$color.highlight <- scales::alpha(colpal, alpha = 1)
V(dg)$color.hover <- scales::alpha(colpal, alpha = 0.5)
V(dg)$color.border <- "black"
# ploting an interactive network that colors the genesets according to the supercluster
visNetwork::visIgraph(dg) %>%
visOptions(highlightNearest = list(enabled = TRUE, degree = 1, hover = TRUE),
nodesIdSelection = TRUE,
selectedBy = "membership")
# ggsave("visNet_cluster.pdf")
# generate the vst transformed data from the DESeq2 results
vst_data <- DESeq2::vst(data$dds)
c_3 <- c("GO:0030890", "GO:0042110", "GO:0050853", "GO:0002313", "GO:0030183")
# then, generate the tabbed heatmap output
a1 <- gs_heatmap(vst_data,
data$res_de,
data$res_enrich,
data$annotation_obj,
geneset_id = c_3[1],
cluster_columns = TRUE)
a2 <- gs_heatmap(vst_data,
data$res_de,
data$res_enrich,
data$annotation_obj,
geneset_id = c_3[2],
cluster_columns = TRUE)
a3 <- gs_heatmap(vst_data,
data$res_de,
data$res_enrich,
data$annotation_obj,
geneset_id = c_3[3],
cluster_columns = TRUE)
a4 <- gs_heatmap(vst_data,
data$res_de,
data$res_enrich,
data$annotation_obj,
de_only = T,
geneset_id = c_3[4],
cluster_columns = TRUE)
a5 <- gs_heatmap(vst_data,
data$res_de,
data$res_enrich,
data$annotation_obj,
de_only = T,
geneset_id = c_3[5],
cluster_columns = TRUE)
a1
# ggsave("c3_1.pdf")
a2
# ggsave("c3_2.pdf")
a3
# ggsave("c3_3.pdf")
a4
# ggsave("c3_4.pdf")
a5
# ggsave("c3_5.pdf")
Then, I will try to generate a vocanoplot that highlights the genes involved in the genesets mentioned above.
volcano_T8 <- read_excel("out_dennis_leukemia_condition_g2_vs_g1_tbl_res_DESeq.xlsx")
volc_distilled2(
data = volcano_T8,
res_enrich = data$res_enrich,
exp_thres = 1,
gs_ids = c_3,
add = c("Vpreb1", "Vpreb2", "Igll3", "Igll1", "Ccnd3", "Ccnd2")
)
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: ggrepel: 25 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
ggsave("c3_T8.pdf")
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: ggrepel: 26 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
generating the same volcano plot with the data from T11
# data loading and cleaning of the available T11 data set
volcano_T11 <- read_delim("volcano.csv", delim = ";",
escape_double = FALSE, locale = locale(decimal_mark = ","),
trim_ws = TRUE)
## Rows: 12122 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ";"
## chr (1): geneSymbol
## dbl (6): log2FoldChange, pvalue, i, (i/m)Q, adjusted P, adjusted P -log10
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
volcano_T11 <- volcano_T11 %>%
select(log2FoldChange, 'adjusted P -log10', geneSymbol) %>%
na.omit()
# create geneset vector that combines these four GO genesets from cluster 3 (including some other B cell markers)
target <- data$res_enrich %>%
filter(
gs_id %in% c("GO:0030183", "GO:0002313", "GO:0050853", "GO:0030890", "GO:0042110")
) %>%
select(gs_genes) %>%
as_vector() %>%
strsplit(",") %>%
unlist() %>%
unname() %>%
c(., "Vpreb1", "Vpreb2", "Igll3", "Igll1", "Ccnd3", "Ccnd2")
# creating
volcano_interest_T11 <- volcano_T11 %>%
filter(geneSymbol %in% target)
# generating a vector containing all the highly regulated genes in the target geneset
volcano_interest_hi_T11 <- volcano_interest_T11 %>%
filter(log2FoldChange >= 1 | log2FoldChange <= -1)
## Plot function for the volcano plot
ggplot(mapping = aes(x = log2FoldChange, y = `adjusted P -log10`)) +
# general scatter plot with all the genes
geom_point(data = volcano_T11 %>%
filter(
log2FoldChange >= 1 | log2FoldChange <= -1
),
size = 3,
alpha = 0.5,
shape = 21
) +
geom_point(data = volcano_T11 %>%
filter(
(log2FoldChange <= 1 & log2FoldChange >= 0) |
(log2FoldChange >= -1 & log2FoldChange <= 0)
),
size = 3,
alpha = 0.01,
shape = 21
) +
# highlighting the most important genes
geom_point(data = volcano_interest_T11 %>%
filter(
log2FoldChange >= 1 | log2FoldChange <= -1
),
color = "#BA2B34",
alpha = 1,
size = 3
) +
geom_point(data = volcano_interest_T11 %>%
filter(
(log2FoldChange <= 1 & log2FoldChange >= 0) |
(log2FoldChange >= -1 & log2FoldChange <= 0)
),
color = "#BA2B34",
alpha = 0.1,
size = 3
) +
# annotating the highlighted genes
geom_text_repel(data = volcano_interest_T11 %>%
filter(
log2FoldChange >= 1 | log2FoldChange <= -1
),
aes(label = geneSymbol, x = log2FoldChange-0.1),
size = 3, hjust = 1, vjust = 0, max.overlaps = 15
) +
theme_minimal() +
geom_vline(aes(xintercept = 0)) +
geom_vline(aes(xintercept = 1)) +
geom_vline(aes(xintercept = -1)) +
theme( axis.line.x = element_line(color="black"),
axis.title.x = element_text(size=12, face="bold",
margin = margin(t = 7, b = 5)),
axis.title.y = element_text(size=12, face="bold",
margin = margin(r = 11, l = 5)),
axis.text.x = element_text(size=11, face="bold"),
axis.text.y = element_text(size=10, face="bold")
) +
# theme_bw() +
# theme(panel.grid.major = element_blank(),
# panel.grid.minor = element_blank(),
# panel.border = element_blank(),
# axis.line = element_line(colour = "black")
# ) +
guides(color = FALSE) +
scale_color_manual(values = c("#A0A09F", "#A7CFEE")) +
labs(y = "BH-adjusted P (-log10)",
x = "log2(fold change)"
) +
xlim(-6, 6) +
coord_fixed(ratio = 1/10)
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
# ggsave("c3_T11_volc.pdf")
c_11 <- c("GO:0000122", "GO:0010557", "GO:0006915", "GO:0042127", "GO:0045943", "GO:0043065", "GO:1901701", "GO:0043066", "GO:0042326", "GO:0032870", "GO:0071407", "GO:0071417")
# 1. volcano with the genes from geneset cluster 11
volc_distilled2(
data = volcano_T8,
gs_ids = "GO:0006915",
exp_thres = 1,
res_enrich = data$res_enrich,
add = NULL
)
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: ggrepel: 52 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
# ggsave("apopt_T8.pdf")
Generating a bar plot to indicate regulation strength of genesets involved in each cluster
# cluster 11
c11_enrich <- data$res_enrich %>%
select(gs_id, gs_description, z_score, gs_pvalue) %>%
filter(gs_id %in% c_11) %>%
arrange(desc(z_score))
c11_enrich$gs_description <- factor(
c11_enrich$gs_description,
levels = c11_enrich$gs_description[order(c11_enrich$z_score)])
p1 <- ggplot(data = c11_enrich, aes(y = gs_description, x = z_score)) +
geom_col() +
theme_minimal()
p1
# ggsave ("c11_T8_barplot.pdf")
# cluster 3
c3_enrich <- data$res_enrich %>%
select(gs_id, gs_description, z_score, gs_pvalue) %>%
filter(gs_id %in% c_3) %>%
arrange(desc(z_score))
c3_enrich$gs_description <- factor(
c3_enrich$gs_description,
levels = c3_enrich$gs_description[order(c3_enrich$z_score)])
p3 <- ggplot(data = c3_enrich, aes(y = gs_description, x = z_score)) +
geom_col() +
theme_minimal()
p3
# ggsave ("c3_T8_barplot.pdf")
To answer the question which genes are altered relevantly in both datasets (i.e. T8 and T11)
T11_reg <- volcano_T11 %>%
filter(log2FoldChange >= 1 | log2FoldChange <= -1) %>%
select(geneSymbol) %>%
as_vector() %>%
unname()
T8T11_reg <- volcano_T8 %>%
filter(geneSymbol %in% T11_reg) %>%
filter(log2FoldChange >= 1 | log2FoldChange <= -1) %>%
select(geneSymbol) %>%
as_vector() %>%
unname()
shared <- volcano_T8 %>%
filter(geneSymbol %in% T8T11_reg)
shared_T11 <- volcano_T11 %>%
filter(geneSymbol %in% T8T11_reg)
datatable(shared)
to plot this visually
shared_top <- shared %>%
arrange(desc(log2FoldChange)) %>%
mutate(n = row_number(),
nn = 1 + max(n) - n) %>%
ungroup() %>%
filter(n <= 25 | nn <= 8)
shared_top_T11 <- shared_T11 %>%
arrange(desc(log2FoldChange)) %>%
mutate(n = row_number(),
nn = 1 + max(n) - n) %>%
ungroup() %>%
filter(n <= 25 | nn <= 8)
shared_top$geneSymbol <- factor(
shared_top$geneSymbol,
levels = shared_top$geneSymbol[order(shared_top$log2FoldChange)])
shared_top_T11$geneSymbol <- factor(
shared_top_T11$geneSymbol,
levels = shared_top_T11$geneSymbol[order(shared_top_T11$log2FoldChange)])
ggplot(data = shared_top, mapping = aes(x = geneSymbol,y = log2FoldChange, fill = -log(padj))) +
geom_col() +
scale_fill_gradient(low="#2C4D9D", high="grey") +
theme(axis.text.x=element_text(color = "black", size=7, angle=90, vjust=.8, hjust=1)) +
theme(legend.title = element_blank()) +
coord_fixed(ratio = 1/1.2)
# ggsave("shared_T8.pdf")
ggplot(data = shared_top_T11, mapping = aes(x = geneSymbol,y = log2FoldChange, fill = (`adjusted P -log10`))) +
geom_col() +
scale_fill_gradient(low="#2C4D9D", high="grey") +
theme(axis.text.x=element_text(color = "black", size=7, angle=90, vjust=.8, hjust=1)) +
theme(legend.title = element_blank()) +
coord_fixed(ratio = 2/1)
# ggsave("shared_T11.pdf")
We can show these genesets also in an MDS plot
gs_mds(gtl = data,
n_gs = 100,
# mds_labels = 5,
similarity_measure = "kappa_matrix",
mds_k = 2,
gs_labels = c_3,
mds_colorby = "z_score",
plot_title = NULL)
# ggsave("MDS B cells.pdf")
gs_mds(gtl = data,
n_gs = 100,
# mds_labels = 5,
similarity_measure = "kappa_matrix",
mds_k = 2,
gs_labels = c_11,
mds_colorby = "z_score",
plot_title = NULL)
## Warning: ggrepel: 4 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
# ggsave("MDS apop.pdf")
gs_mds(gtl = data,
n_gs = 100,
mds_labels = 5,
similarity_measure = "kappa_matrix",
mds_k = 2,
mds_colorby = "z_score",
plot_title = NULL)
# ggsave("MDS GO.pdf")
Next, we might want to look at the interaction of these genesets in the form of a network analyis, including (as a bipartite graph) both genesets and genes.
ggs <- ggs_graph(gtl = data,
n_gs = 30)
ggs
## IGRAPH 9ffb326 UN-- 816 1413 --
## + attr: name (v/c), nodetype (v/c), shape (v/c), color (v/c), title
## | (v/c)
## + edges from 9ffb326 (vertex names):
## [1] defense response to virus--Adar defense response to virus--Akap1
## [3] defense response to virus--Apobec3 defense response to virus--Armc5
## [5] defense response to virus--Atad3a defense response to virus--Bcl2
## [7] defense response to virus--Bnip3 defense response to virus--Bst2
## [9] defense response to virus--C1qbp defense response to virus--Cd40
## [11] defense response to virus--Creb3 defense response to virus--Ddx1
## [13] defense response to virus--Ddx21 defense response to virus--Ddx58
## + ... omitted several edges
ggs %>%
visIgraph() %>%
visOptions(highlightNearest = list(enabled = TRUE,
degree = 1,
hover = TRUE),
nodesIdSelection = TRUE)
Finally, for the initial analysis, we can prepare an interaction network of genesets, where the gene sets are presented as circles, their diameter corresponding to the number of genes in the set, their color corresponding to the z-score for the differential expression. We will use the top 100 genesets for a diverse overview.
em <- GeneTonic::enrichment_map(gtl = data,
n_gs = 100,
color_by = "z_score")
em %>%
visIgraph() %>%
visOptions(highlightNearest = list(enabled = TRUE,
degree = 1,
hover = TRUE),
nodesIdSelection = TRUE)
As summary visualisation of the simplified GO data as a volcano plot
res_enrich_simplified <- gs_simplify(data$res_enrich,
gs_overlap = 0.7)
gs_volcano(res_enrich_simplified,
p_threshold = 0.05,
color_by = "aggr_score",
volcano_labels = 6,
scale_circles = 0.5,
plot_title = "top regulated genesets")
# ggsave("GO_volc.pdf")
and finally a simple output as dotplot for the most significantly regulated genesets. Plus a visualization of the top genesets and how the genes involved in their overexpression might overlap between the top regulated sets.
gs_summary_overview(data$res_enrich,
n_gs = 10,
p_value_column = "gs_pvalue",
color_by = "z_score")
gs_summary_heat(gtl = data,
n_gs = 10)
First, we generate the pca and plot the first two PCs, checking the scree plot.
vst <- vst(data$dds)
rld <- rlog(data$dds)
pcaplot(rld,intgroup = c("condition"),ntop = 1000,
pcX = 1, pcY = 2, title = "T8 I4 vs. GFP PCA on samples - PC1 vs PC2",
ellipse = TRUE)
ggsave("PCA.pdf")
## Saving 7 x 5 in image
We have a solid representation of the data with PC1 and PC2. Nonetheless, we could look at the scree plot to see what amount of information is to be gained from subsequent PCs.
pcaobj_RNA <- prcomp(t(assay(rld)))
pcascree(pcaobj_RNA,type="pev",
title="Proportion of explained proportion of variance")
Now, we check the loadings that explain the variance observed in PC1
hi_loadings(pcaobj_RNA,topN = 20, exprTable=counts(data$dds))
## N1_G1 N2_G2 N3_I1 N4_I2
## ENSMUSG00000037944 1237 1301 9438 10210
## ENSMUSG00000107705 258 374 2644 3086
## ENSMUSG00000078853 293 340 2701 3257
## ENSMUSG00000104713 15 17 629 694
## ENSMUSG00000020689 66 66 1129 1187
## ENSMUSG00000028037 24 25 618 713
## ENSMUSG00000020638 562 540 5499 6256
## ENSMUSG00000034634 17 18 746 896
## ENSMUSG00000034708 1800 1899 20494 20749
## ENSMUSG00000020641 6 5 717 708
## ENSMUSG00000038421 189 149 2524 2815
## ENSMUSG00000022584 1 7 1067 1170
## ENSMUSG00000035692 49 61 1684 1983
## ENSMUSG00000044468 8 15 1175 1135
## ENSMUSG00000021356 6238 5986 73249 69574
## ENSMUSG00000072620 93 121 2792 3205
## ENSMUSG00000044052 25 32 1546 1743
## ENSMUSG00000015340 10 5 977 941
## ENSMUSG00000030107 40 43 2204 2897
## ENSMUSG00000046805 34 15 2779 3011
## ENSMUSG00000109941 610 595 0 0
## ENSMUSG00000051726 1521 1442 161 177
## ENSMUSG00000069972 0 975 0 0
## ENSMUSG00000027562 13809 14705 3571 4131
## ENSMUSG00000062380 3609 3877 967 928
## ENSMUSG00000056481 2483 2286 652 747
## ENSMUSG00000030365 1759 1751 583 577
## ENSMUSG00000003484 790 763 193 244
## ENSMUSG00000024402 1177 1257 345 416
## ENSMUSG00000029576 332 380 65 59
## ENSMUSG00000059305 16323 18647 6417 7267
## ENSMUSG00000020973 4356 4026 1630 1685
## ENSMUSG00000026866 4618 5187 1907 1986
## ENSMUSG00000071713 1594 1461 591 592
## ENSMUSG00000026970 1457 1244 627 572
## ENSMUSG00000097411 858 625 171 360
## ENSMUSG00000031785 1494 1575 551 549
## ENSMUSG00000004105 350 391 83 118
## ENSMUSG00000026278 340 384 66 144
## ENSMUSG00000032741 1397 1252 530 506
hi_loadings(pcaobj_RNA, topN = 26, annotation = data$annotation_obj)
We already see, that the preB cell receptor components are an important factor as loadings of the PC1. However, we can now also go ahead and perform GO analysis of the loadings of PC1.
bg_ids <- rownames(data$dds)[rowSums(counts(data$dds)) > 0]
goquick <- limmaquickpca2go(rld,
pca_ngenes = 5000,
inputType = "ENSEMBL",
organism = "Mm",
loadings_ngenes = 50,
background_genes = bg_ids
)
# for a smooth interactive exploration, use DT::datatable
datatable(goquick$PC1$posLoad)
sessionInfo()
## R version 4.1.1 (2021-08-10)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_GB.UTF-8/en_GB.UTF-8/en_GB.UTF-8/C/en_GB.UTF-8/en_GB.UTF-8
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] apeglm_1.14.0 igraph_1.2.8
## [3] enrichplot_1.12.3 pheatmap_1.0.12
## [5] enrichR_3.0 magrittr_2.0.1
## [7] visNetwork_2.1.0 pcaExplorer_2.18.0
## [9] GeneTonic_1.4.1 plotly_4.10.0
## [11] DOSE_3.18.3 clusterProfiler_4.0.5
## [13] org.Mm.eg.db_3.13.0 AnnotationDbi_1.54.1
## [15] biomaRt_2.48.3 DESeq2_1.32.0
## [17] SummarizedExperiment_1.22.0 Biobase_2.52.0
## [19] MatrixGenerics_1.4.3 matrixStats_0.61.0
## [21] GenomicRanges_1.44.0 GenomeInfoDb_1.28.4
## [23] IRanges_2.26.0 S4Vectors_0.30.2
## [25] BiocGenerics_0.38.0 RColorBrewer_1.1-2
## [27] ggrepel_0.9.1 cowplot_1.1.1
## [29] DT_0.19 rmarkdown_2.11
## [31] markdown_1.1 readxl_1.3.1
## [33] forcats_0.5.1 stringr_1.4.0
## [35] dplyr_1.0.7 purrr_0.3.4
## [37] readr_2.0.2 tidyr_1.1.4
## [39] tibble_3.1.6 ggplot2_3.3.5
## [41] tidyverse_1.3.1
##
## loaded via a namespace (and not attached):
## [1] rappdirs_0.3.3 SparseM_1.81 AnnotationForge_1.34.1
## [4] coda_0.19-4 pkgmaker_0.32.2 bit64_4.0.5
## [7] knitr_1.36 DelayedArray_0.18.0 data.table_1.14.2
## [10] KEGGREST_1.32.0 RCurl_1.98-1.5 doParallel_1.0.16
## [13] generics_0.1.1 RSQLite_2.2.8 shadowtext_0.0.9
## [16] bit_4.0.4 tzdb_0.2.0 webshot_0.5.2
## [19] xml2_1.3.2 lubridate_1.8.0 httpuv_1.6.3
## [22] assertthat_0.2.1 viridis_0.6.2 xfun_0.28
## [25] hms_1.1.1 jquerylib_0.1.4 TSP_1.1-11
## [28] evaluate_0.14 promises_1.2.0.1 fansi_0.5.0
## [31] progress_1.2.2 dendextend_1.15.2 dbplyr_2.1.1
## [34] Rgraphviz_2.36.0 DBI_1.1.1 geneplotter_1.70.0
## [37] htmlwidgets_1.5.4 ellipsis_0.3.2 crosstalk_1.2.0
## [40] backports_1.3.0 annotate_1.70.0 gridBase_0.4-7
## [43] vctrs_0.3.8 Cairo_1.5-12.2 cachem_1.0.6
## [46] withr_2.4.2 ggforce_0.3.3 vroom_1.5.5
## [49] bdsmatrix_1.3-4 treeio_1.16.2 prettyunits_1.1.1
## [52] cluster_2.1.2 ape_5.5 backbone_1.5.1
## [55] lazyeval_0.2.2 crayon_1.4.2 genefilter_1.74.1
## [58] labeling_0.4.2 pkgconfig_2.0.3 tweenr_1.0.2
## [61] seriation_1.3.1 nlme_3.1-153 rlang_0.4.12
## [64] lifecycle_1.0.1 miniUI_0.1.1.1 colourpicker_1.1.1
## [67] downloader_0.4 registry_0.5-1 filelock_1.0.2
## [70] BiocFileCache_2.0.0 GOstats_2.58.0 modelr_0.1.8
## [73] cellranger_1.1.0 polyclip_1.10-0 graph_1.70.0
## [76] rngtools_1.5.2 Matrix_1.3-4 aplot_0.1.1
## [79] reprex_2.0.1 base64enc_0.1-3 GlobalOptions_0.1.2
## [82] png_0.1-7 viridisLite_0.4.0 rjson_0.2.20
## [85] bitops_1.0-7 shinydashboard_0.7.2 Biostrings_2.60.2
## [88] blob_1.2.2 shape_1.4.6 rintrojs_0.3.0
## [91] qvalue_2.24.0 shinyAce_0.4.1 gridGraphics_0.5-1
## [94] scales_1.1.1 GSEABase_1.54.0 memoise_2.0.0
## [97] plyr_1.8.6 zlibbioc_1.38.0 threejs_0.3.3
## [100] compiler_4.1.1 scatterpie_0.1.7 bbmle_1.0.24
## [103] clue_0.3-60 cli_3.1.0 XVector_0.32.0
## [106] Category_2.58.0 patchwork_1.1.1 MASS_7.3-54
## [109] tidyselect_1.1.1 stringi_1.7.5 shinyBS_0.61
## [112] highr_0.9 emdbook_1.3.12 yaml_2.2.1
## [115] GOSemSim_2.18.1 locfit_1.5-9.4 grid_4.1.1
## [118] sass_0.4.0 fastmatch_1.1-3 tools_4.1.1
## [121] circlize_0.4.13 rstudioapi_0.13 foreach_1.5.1
## [124] gridExtra_2.3 farver_2.1.0 ggraph_2.0.5
## [127] digest_0.6.28 shiny_1.7.1 Rcpp_1.0.7
## [130] broom_0.7.10 later_1.3.0 shinyWidgets_0.6.2
## [133] httr_1.4.2 ComplexHeatmap_2.8.0 colorspace_2.0-2
## [136] rvest_1.0.2 XML_3.99-0.8 fs_1.5.0
## [139] topGO_2.44.0 splines_4.1.1 RBGL_1.68.0
## [142] yulab.utils_0.0.4 tidytree_0.3.5 expm_0.999-6
## [145] graphlayouts_0.7.1 ggplotify_0.1.0 xtable_1.8-4
## [148] jsonlite_1.7.2 ggtree_3.0.4 heatmaply_1.3.0
## [151] dynamicTreeCut_1.63-1 tidygraph_1.2.0 ggfun_0.0.4
## [154] R6_2.5.1 pillar_1.6.4 htmltools_0.5.2
## [157] mime_0.12 NMF_0.23.0 glue_1.5.0
## [160] fastmap_1.1.0 BiocParallel_1.26.2 bs4Dash_2.0.3
## [163] codetools_0.2-18 fgsea_1.18.0 mvtnorm_1.1-3
## [166] utf8_1.2.2 lattice_0.20-45 bslib_0.3.1
## [169] numDeriv_2016.8-1.1 curl_4.3.2 GO.db_3.13.0
## [172] limma_3.48.3 survival_3.2-13 munsell_0.5.0
## [175] DO.db_2.9 GetoptLong_1.0.5 GenomeInfoDbData_1.2.6
## [178] iterators_1.0.13 haven_2.4.3 reshape2_1.4.4
## [181] gtable_0.3.0 shinycssloaders_1.0.0